In [1]:
%pylab inline
import warnings
warnings.filterwarnings("ignore")
import nolearn
from nolearn.lasagne import NeuralNet
from progress_bar import ProgressBar
import createdata 
import lasagne
from lasagne import layers
from sklearn import metrics
import detectobjects as det


Populating the interactive namespace from numpy and matplotlib

In [2]:
opts = {'img_dir': '../data/intestinalparasites_Images/',
        'models_dir': '../models/',
        'annotation_dir': '../data/intestinalparasites_annotation/',
        'train-dir': 'train_dir/',
        'test-dir': 'test_dir/',
        'val-dir': 'val_dir/',
        'patches_dir': 'patches_dir/',
        'augment-training-data': False,
        'model': '2C-1FC-O',
        'threshold': 0.9, 
        'overlapThreshold': 0.3, 
        'lim': 0, 
        'gauss': 1,
        'prob': det.non_maximum_suppression, 
        'pos': det.non_maximum_suppression, 
        'probs_area': 90,
        'input_scale': None,
        'raw_scale': 255,
        'image_dims': (600,600),
        'image_downsample' : 10,
        'channel_swap': None,
        'probs_area': 40,
        'detection-step': 10,
        'patch-creation-step': 40,
        'object-class': 'hookworm',
        'negative-training-discard-rate': .9
       }
opts['patch_stride_training'] = int(opts['image_dims'][0]*.5)

In [3]:
reload(createdata)
trainfiles, valfiles, testfiles = createdata.create_sets(opts['img_dir'], train_set_proportion=.6, 
                                                  test_set_proportion=.39,
                                                  val_set_proportion=.01)

train_y, train_X = createdata.create_patches(trainfiles, opts['annotation_dir'], opts['img_dir'],
opts['image_dims'][0], opts['patch_stride_training'], grayscale=False, progressbar=True, downsample=opts['image_downsample'], 
objectclass=opts['object-class'], negative_discard_rate=opts['negative-training-discard-rate'])

test_y, test_X = createdata.create_patches(testfiles,  opts['annotation_dir'], opts['img_dir'], 
opts['image_dims'][0], opts['patch_stride_training'], grayscale=False, progressbar=True, downsample=opts['image_downsample'], 
objectclass=opts['object-class'], negative_discard_rate=opts['negative-training-discard-rate'])

val_y, val_X = createdata.create_patches(valfiles, opts['annotation_dir'], opts['img_dir'], 
opts['image_dims'][0], opts['patch_stride_training'], grayscale=False, progressbar=True, downsample=opts['image_downsample'], 
objectclass=opts['object-class'], negative_discard_rate=opts['negative-training-discard-rate'])

# For training/validation, cut down on disproportionately large numbers of negative patches
train_X, train_y = createdata.balance(train_X, train_y, mult_neg=100)
val_X, val_y = createdata.balance(val_X, val_y, mult_neg=100)

# Create rotated and flipped versions of the positive patches
train_X, train_y = createdata.augment_positives(train_X, train_y)
val_X, val_y = createdata.augment_positives(val_X, val_y)
test_X, test_y = createdata.augment_positives(test_X, test_y)


[****************100%******************]  3 of 3 complete

In [54]:
'%d positive training examples, %d negative training examples' % (sum(train_y), len(train_y)-sum(train_y))


Out[54]:
'128 positive training examples, 267 negative training examples'

In [55]:
print '%d positive testing examples, %d negative testing examples' % (sum(test_y), len(test_y)-sum(test_y))


128 positive testing examples, 158 negative testing examples

In [56]:
print '%d patches (%.1f%% positive)' % (len(train_y)+len(test_y), 100.*((sum(train_y)+sum(test_y))/(len(train_y)+len(test_y))))


681 patches (0.0% positive)
View a random selection of positive and negative patches to see if they look right

In [ ]:


In [12]:
N_samples_to_display = 10
pos_indices = np.where(train_y)[0]
pos_indices = pos_indices[np.random.permutation(len(pos_indices))]
for i in range(N_samples_to_display):
    plt.subplot(2,N_samples_to_display,i+1)
    example_pos = train_X[pos_indices[i],:,:,:]
    example_pos = np.swapaxes(example_pos,0,2)
    plt.imshow(example_pos[:,:,[2,1,0]])
    
neg_indices = np.where(train_y==0)[0]
neg_indices = neg_indices[np.random.permutation(len(neg_indices))]
for i in range(N_samples_to_display,2*N_samples_to_display):
    plt.subplot(2,N_samples_to_display,i+1)
    example_neg = train_X[neg_indices[i],:,:,:]
    example_neg = np.swapaxes(example_neg,0,2)
    plt.imshow(example_neg[:,:,[2,1,0]])
    
plt.gcf().set_size_inches(1.5*N_samples_to_display,3)


CNN training

In [13]:
def CNN(n_epochs):
    net1 = NeuralNet(
        layers=[
        ('input', layers.InputLayer),
        ('conv1', layers.Conv2DLayer),      #Convolutional layer.  Params defined below
        ('pool1', layers.MaxPool2DLayer),   # Like downsampling, for execution speed
        ('conv2', layers.Conv2DLayer),
        ('hidden3', layers.DenseLayer),
        ('output', layers.DenseLayer),
        ],
        
    input_shape=(None, 3, opts['image_dims'][0]/opts['image_downsample'], 
                 opts['image_dims'][0]/opts['image_downsample']),
    conv1_num_filters=7, 
    conv1_filter_size=(5, 5), 
    conv1_nonlinearity=lasagne.nonlinearities.rectify,
        
    pool1_pool_size=(2, 2),
        
    conv2_num_filters=12, 
    conv2_filter_size=(2, 2),    
    conv2_nonlinearity=lasagne.nonlinearities.rectify,
        
    hidden3_num_units=50,
    output_num_units=2, 
    output_nonlinearity=lasagne.nonlinearities.softmax,

    update_learning_rate=0.0001,
    update_momentum=0.9,

    max_epochs=n_epochs,
    verbose=1,
    )
    return net1

cnn = CNN(50).fit(train_X, train_y)


# Neural Network with 438432 learnable parameters

## Layer information

  #  name     size
---  -------  --------
  0  input    3x60x60
  1  conv1    7x56x56
  2  pool1    7x28x28
  3  conv2    12x27x27
  4  hidden3  50
  5  output   2

  epoch    trn loss    val loss    trn/val    valid acc  dur
-------  ----------  ----------  ---------  -----------  -----
      1    43.89158     9.55640    4.59290      0.38235  1.53s
      2     5.15587     0.62822    8.20714      0.57353  1.35s
      3     0.73944     0.58266    1.26908      0.60294  1.38s
      4     0.61494     0.54080    1.13709      0.66176  1.40s
      5     0.56353     0.51759    1.08877      0.69118  1.38s
      6     0.54864     0.50707    1.08198      0.69118  1.41s
      7     0.53621     0.50152    1.06917      0.69118  1.35s
      8     0.52400     0.49731    1.05366      0.69118  1.35s
      9     0.51401     0.49399    1.04053      0.69118  1.40s
     10     0.50611     0.49089    1.03101      0.69118  1.37s
     11     0.49902     0.48796    1.02265      0.69118  1.37s
     12     0.49205     0.48520    1.01411      0.70588  1.38s
     13     0.48508     0.48265    1.00503      0.70588  1.39s
     14     0.47853     0.48033    0.99625      0.70588  1.35s
     15     0.47219     0.47816    0.98752      0.70588  1.37s
     16     0.46638     0.47626    0.97926      0.70588  1.37s
     17     0.46110     0.47456    0.97165      0.70588  1.38s
     18     0.45608     0.47309    0.96405      0.70588  1.38s
     19     0.45127     0.47177    0.95655      0.69118  1.38s
     20     0.44678     0.47053    0.94953      0.69118  1.38s
     21     0.44267     0.46922    0.94341      0.70588  1.35s
     22     0.43877     0.46796    0.93761      0.72059  1.44s
     23     0.43500     0.46666    0.93215      0.72059  1.37s
     24     0.43133     0.46537    0.92686      0.70588  1.36s
     25     0.42810     0.46428    0.92207      0.72059  1.37s
     26     0.42521     0.46323    0.91792      0.72059  1.38s
     27     0.42268     0.46253    0.91385      0.72059  1.40s
     28     0.42045     0.46159    0.91089      0.70588  1.39s
     29     0.41847     0.46057    0.90859      0.70588  1.38s
     30     0.41668     0.45957    0.90668      0.70588  1.34s
     31     0.41499     0.45864    0.90482      0.70588  1.38s
     32     0.41343     0.45778    0.90311      0.70588  1.37s
     33     0.41196     0.45700    0.90143      0.70588  1.35s
     34     0.41057     0.45628    0.89982      0.70588  1.37s
     35     0.40928     0.45559    0.89835      0.70588  1.34s
     36     0.40808     0.45502    0.89685      0.70588  1.37s
     37     0.40698     0.45444    0.89557      0.70588  1.38s
     38     0.40596     0.45392    0.89435      0.70588  1.47s
     39     0.40503     0.45340    0.89331      0.70588  1.37s
     40     0.40418     0.45288    0.89248      0.70588  1.33s
     41     0.40342     0.45237    0.89178      0.70588  1.37s
     42     0.40272     0.45187    0.89122      0.70588  1.38s
     43     0.40207     0.45137    0.89078      0.70588  1.38s
     44     0.40149     0.45089    0.89045      0.70588  1.33s
     45     0.40095     0.45042    0.89017      0.70588  1.37s
     46     0.40045     0.44998    0.88992      0.70588  1.36s
     47     0.39998     0.44956    0.88971      0.70588  1.38s
     48     0.39954     0.44916    0.88953      0.70588  1.37s
     49     0.39912     0.44878    0.88934      0.70588  1.35s
     50     0.39872     0.44845    0.88911      0.70588  1.37s

Make predictions and evaluate on test data


In [14]:
y_pred = cnn.predict_proba(test_X)

In [15]:
false_positive_rate, true_positive_rate, thresholds = metrics.roc_curve(test_y, y_pred[:,1])
roc_auc = metrics.auc(false_positive_rate, true_positive_rate)
plt.title('Receiver Operating Characteristic: AUC = %0.2f'% roc_auc)
plt.plot(false_positive_rate, true_positive_rate, 'b')
plt.legend(loc='lower right')
plt.plot([0,1],[0,1],'r--')
plt.ylim([-.05, 1.05])
plt.xlim([-.05, 1.0])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()



In [16]:
false_positive_rate, true_positive_rate, thresholds = metrics.roc_curve(test_y, y_pred[:,1])
true_positive_rate.shape, thresholds.shape
plt.plot(true_positive_rate, thresholds,label='False positive rate')
plt.plot(false_positive_rate, thresholds, label='True positive rate')
plt.xlabel('Threshold')
plt.legend(loc='upper left')


Out[16]:
<matplotlib.legend.Legend at 0xa97fb7ec>

In [17]:
precision, recall, thresholds = metrics.precision_recall_curve(test_y, y_pred[:,1])
average_precision = metrics.average_precision_score(test_y, y_pred[:, 1])

plt.plot(recall, precision)
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0])
plt.title('Precision-Recall: AUC={0:0.2f}'.format(average_precision))
plt.legend(loc="lower left")


Examine mistakes to understand network performance - false positives

Find the negative-labelled patches with highest prediction score


In [18]:
neg_indices = np.where(test_y==0)[0]
neg_scores = y_pred[neg_indices,1]
neg_indices = neg_indices[neg_scores.argsort()]
neg_indices = neg_indices[::-1]

neg_scores = y_pred[neg_indices,1]
neg_scores

N_samples_to_display = 10

for i in range(N_samples_to_display,2*N_samples_to_display):
    plt.subplot(2,N_samples_to_display,i+1)
    example_neg = test_X[neg_indices[i],:,:,:]
    example_neg = np.swapaxes(example_neg,0,2)
    plt.imshow(example_neg[:,:,[2,1,0]])

print neg_scores[:N_samples_to_display]

plt.gcf().set_size_inches(1.5*N_samples_to_display,3)


[ 0.51415054  0.5072187   0.50292046  0.50292046  0.50292046  0.50292046
  0.50292046  0.50292046  0.50292046  0.50292046]

See highest-scored test patches


In [19]:
prob_range = [.9,1.]

tmp_scores = y_pred.copy()[:,1]
tmp_scores[tmp_scores<prob_range[0]] = -1
tmp_scores[tmp_scores>prob_range[1]] = -1

pos_indices = tmp_scores.argsort()
pos_indices = pos_indices[::-1]

N_samples_to_display = 10

for i in range(N_samples_to_display,2*N_samples_to_display):
    plt.subplot(2,N_samples_to_display,i+1)
    example_neg = test_X[pos_indices[i],:,:,:]
    example_neg = np.swapaxes(example_neg,0,2)
    plt.imshow(example_neg[:,:,[2,1,0]])

plt.gcf().set_size_inches(1.5*N_samples_to_display,3)


Lowest scored test patches

Object detection in an entire field of view


In [20]:
imfile = opts['img_dir'] + testfiles[5]
%timeit found = det.detect(imfile, cnn, opts)


/home/lubega/env/lib/python2.7/site-packages/skimage/filters/_gaussian.py:13: skimage_deprecation: Call to deprecated function ``gaussian_filter``. Use ``skimage.filters.gaussian`` instead.
  multichannel=None):
/home/lubega/env/lib/python2.7/site-packages/skimage/filters/_gaussian.py:13: skimage_deprecation: Call to deprecated function ``gaussian_filter``. Use ``skimage.filters.gaussian`` instead.
  multichannel=None):
/home/lubega/env/lib/python2.7/site-packages/skimage/filters/_gaussian.py:13: skimage_deprecation: Call to deprecated function ``gaussian_filter``. Use ``skimage.filters.gaussian`` instead.
  multichannel=None):
/home/lubega/env/lib/python2.7/site-packages/skimage/filters/_gaussian.py:13: skimage_deprecation: Call to deprecated function ``gaussian_filter``. Use ``skimage.filters.gaussian`` instead.
  multichannel=None):
1 loops, best of 3: 7.95 s per loop

In [21]:
testfiles[5]


Out[21]:
'intestinalparasites-phone-0003.jpg'

In [22]:
import cv2
im = cv2.imread(imfile)

plt.box(False)
plt.xticks([])
plt.yticks([])

for f in det.detect(imfile, cnn, opts):
    f = f.astype(int)
    cv2.rectangle(im, (f[0], f[1]), (f[2],f[3]), (0,0,255), 8)
    
plt.gcf().set_size_inches(10,10)
plt.title('Detected objects in %s' % (imfile))
plt.imshow(im[:,:,[2,1,0]])


/home/lubega/env/lib/python2.7/site-packages/skimage/filters/_gaussian.py:13: skimage_deprecation: Call to deprecated function ``gaussian_filter``. Use ``skimage.filters.gaussian`` instead.
  multichannel=None):
Out[22]:
<matplotlib.image.AxesImage at 0xa97aa72c>

In [31]:


In [ ]: